/* This program creates the innovation index and a concordance to naics4
   to work with the BRDIS.

*/

clear
clear matrix
capture log close
set more off

log using "cr_univsys_naics4_indweights_mylog.log", replace

global path 
global p1069  
global lbd 
global lbd_naics 
global xwalks 
global data_from_sas 
global imported_data 
global inter 
global output_data 
global programs 
global out 


use ${imported_data}uspto-sic3-concordance.dta, clear

contract uspto
drop _freq
sort uspto
save ${inter}usptogroups.dta, replace

use ${imported_data}univandhosppatents.dta, clear
save ${inter}univandhosppatents.dta, replace

rename nclass_ccl uspto
sort uspto
merge m:1 uspto using ${inter}usptogroups.dta
keep if _m!=3
contract uspto _merge
drop _freq
count
sort _m uspto
save ${inter}usptostodrop.dta, replace
keep if _m==1
drop _m
sort uspto
save ${inter}todrop1s.dta, replace
use ${inter}usptostodrop.dta, clear
keep if _m==2
drop _m
sort uspto
save ${inter}todrop2s.dta, replace

use ${imported_data}uspto-sic3-concordance.dta, clear
sort uspto
merge m:1 uspto using ${inter}todrop2s.dta
drop if _merge==3
drop _merge
egen sequspto = group(uspto)

reshape wide mfg_freq use_freq, i(uspto sequspto) j(sic3)

sort sequspto
save ${inter}uspto_sic_sequsptosorted_wide.dta, replace

use ${inter}univandhosppatents.dta, clear

keep if appyear<=1980
contract univsysname univsysnum hosp
rename _freq pre85_patcount
gsort -pre85_patcount univsysname
gen pre85univpatrank = _n

sort univsysnum
save ${inter}pre80patrank.dta, replace

use ${inter}univandhosppatents.dta, clear
sort univsysnum appyear
merge m:1 univsysnum using ${inter}pre85patrank.dta
keep if _merge==3
drop _merge

rename nclass_ccl uspto
sort uspto
merge m:1 uspto using ${inter}todrop1s.dta
drop if _merge==3
drop _merge

egen sequspto = group(uspto)

drop if appyear>1980

save ${inter}pre80patents.dta, replace

set more off
forvalues x = 1/200 {
   use ${inter}pre80patents.dta, clear
   keep if sequspto==`x'
   merge m:1 sequspto using ${inter}uspto_sic_sequsptosorted_wide.dta
   tab _merge
   keep if _merge==3
   drop _merge
   reshape long mfg_freq use_freq, i(univsysname univsysnum patent uspto sequspto) j(sic3)
   keep univsysname univsysnum pre80univpatrank sic3 mfg_freq use_freq patent uspto sequspto
   save ${inter}patents`x'.dta, replace
}
* ${inter}patents200.dta should be in memory
forvalues x = 1/199 {
   append using ${inter}patents`x'.dta
}
save ${inter}partial_indweights.dta, replace

forvalues x = 1/200 {
   rm ${inter}patents`x'.dta
}

forvalues x = 201/372 {
   use ${inter}pre80patents.dta, clear
   keep if sequspto==`x'
   merge m:1 sequspto using ${inter}uspto_sic_sequsptosorted_wide.dta
   tab _merge
   keep if _merge==3
   drop _merge
   reshape long mfg_freq use_freq, i(univsysname univsysnum patent uspto sequspto) j(sic3)
   keep univsysname univsysnum pre85univpatrank sic3 mfg_freq use_freq patent uspto sequspto
   save ${inter}patents`x'.dta, replace
}
* ${inter}patents372.dta should be in memory
use ${inter}partial_indweights.dta, clear
forvalues x = 201/371 {
   append using ${inter}patents`x'.dta
}

collapse (sum) mfg_freq (sum) use_freq (mean) pre85univpatrank, by(univsysname univsysnum sic3)

sort univsysname univsysnum sic3
save ${inter}univsys_level_indweights.dta, replace



forvalues x = 201/371 {
   rm ${inter}patents`x'.dta
}
rm ${inter}usptogroups.dta
rm ${inter}univandhosppatents.dta
rm ${inter}usptostodrop.dta
rm ${inter}todrop1s.dta
rm ${inter}todrop2s.dta
rm ${inter}uspto_sic_sequsptosorted_wide.dta
rm ${inter}pre80patrank.dta
rm ${inter}pre80patents.dta
rm ${inter}partial_indweights.dta

log close














 